*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*	This program estimates reduced form and OLS VAM sampling covariances.
*	----------------------------------------------------------------------------
	args sample sch_res	ptype bw
	tokenize `sample', parse("_")
	local years "`5'`6'`7'`8'"

	// pick VAMs to estimate
	local vams unc conv risk rcvam conv_3yrlag

	// pick outcome
	local y m

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


*	load analysis file

	use "${builddata}${city}_analysisfile`sample'`sch_res'`bw'", clear
	merge m:1 sch year using "${builddata}${city}_schoolcovs", nogen keep(1 3)
	if inlist("${city}","NYC","NYCms") {
		ren screened screened_old
		qui mdesc screened
		assert r(miss) == 0
		egen any_scr = max(screened), by(sch)
		replace screened = any_scr
	}

	* rename older lagged scores
	if "${city}"=="NYC" ren (bl_ss_math_6 bl_ss_ela_6) (bl_ss_math_lag bl_ss_ela_lag)
	if "${city}"=="NYCms" ren (bl_ss_math_3 bl_ss_ela_3) (bl_ss_math_lag bl_ss_ela_lag)

	preserve

	ivset, ptype(`ptype')
		local Dfull = r(D)
		local S = r(S)
		local p = r(P)

	if "${city}" == "NYC" {
		drop enr_4555
		drop enr_2337
		drop enr_2583
		drop enr_3459
		drop enr_3552
		drop enr_6334
		drop enr_6365
		drop enr_6403
		drop enr_6524
		drop enr_6564
		drop enr_6388
	}
	if "${city}" == "NYCms" {
		drop enr_6593
		drop enr_3349
	}

	//need schools to be represented in the lottery sample, or else first step regs have no variance ests.
	_rmcoll `Dfull' if `S', forcedrop nocons
	local D = r(varlist)

	gen tag = 0
	foreach var of varlist `D'{
		local sch = substr("`var'",-4,.)
		replace tag = 1 if sch == `sch'
	}
	//lottery school with only one obs in the lotto samp
	keep if tag

	keep sch lottery_`ptype'_sch $sectors omitted
	duplicates drop

	//lotteries first
	gsort omitted -lottery_`ptype'_sch sch
	gen newnum = _n

	replace lottery_`ptype'_sch = 0 if omitted

	//count schools
	local J=_N
	count if lottery_`ptype'_sch
	local L=r(N)

	//store ordering in locals
	forval i=1/`J'{
		qui sum sch if newnum == `i'
		local oldnum_`i' = r(mean)
	}

***	stack key

	order lottery_`ptype'_sch $sectors
	drop sch newnum

	restore

*	enrollment

	forval j = 1/`J'{
		if "${city}"=="NYC" gen vam_`j' = enr_`oldnum_`j''
		if "${city}"=="NYCms" gen vam_`j' = enr_`oldnum_`j''
	}

*	offer dummies

	forval l = 1/`L'{
		gen rf_off_`l' = offer_`oldnum_`l''
	}

***	estimate

	ivset, y(`y') ptype(`ptype')
		local p = r(P)
		local S = r(S)
		local Y = r(Y)

	//keep relevant vars and put rv controls in local
	keep stu vam_* rf_* lottery_form* pscore* rv_* year grade $bl_demos $bl_scores `S' math bl_*
	local rv rv_*

	//order
	order vam_* rf_* , sequential

*	set controls

	* save file that can be loaded in each iteration
	tempfile temp
	sa `temp', replace

	foreach vam in `vams' {

			* load file
			u `temp', clear

			if "`vam'" == "unc" local controls i.year##i.grade
			if "`vam'" == "conv" local controls $iv_controls
			if "`vam'" == "risk" local controls `p'
			if "`vam'" == "rcvam" local controls $iv_controls `p'

			if !inlist("`vam'","conv_3yrlag","rcvam_3yrlag") {
				* estimate VAM
				preserve
					g ind = !`S'
					so ind stu
					qui _regress `Y' vam_* `controls', nocons
					local N_vam = e(N)
					local df_vam = e(df_m)
					predict vam_resids, r
					mata vam_X = st_data(., ("vam_* `controls'"))
					mata vam_e = st_data(.,"vam_resids")
				restore

				* estimate reduced forms
				preserve
					keep if `S'
					so stu
					qui _regress `Y' rf_* `p' $iv_controls
					local N_rf = e(N)
					local df_rf = e(df_m)
					predict rf_resids, r
					g cons = 1
					mata rf_X = st_data(., ("rf_* `p' $iv_controls cons"))
					mata rf_e = st_data(.,"rf_resids")
				restore

				* fill out RF
				mata rows = rows(vam_X) - rows(rf_X)
				mata add = J(rows,cols(rf_X),0)
				mata rf_X_full = rf_X \ add
				mata add = J(rows,1,0)
				mata rf_e_full = rf_e \ add

				* estimate covariance
				mata vam_cov = invsym(vam_X'*vam_X)*(vam_X'*rf_X_full)*invsym(rf_X_full'*rf_X_full)*(vam_e'*rf_e_full)/(`N_vam'-`df_rf'-`df_vam')
				mata vam_cov_JL = vam_cov[1..`J',1..`L']
				mata st_matrix("vam_cov_JL",vam_cov_JL)
				mata st_matrix("vam_cov",vam_cov)

				* output
				preserve
					clear
					svmat vam_cov_JL
					export delimited "${builddata}${city}_V_alpharho_`vam'_JL_new_`y'.csv", replace novarnames
					clear
					svmat vam_cov
					export delimited "${builddata}${city}_V_alpharho_`vam'_new_`y'.csv", replace novarnames
				restore

			}
			else {
				* estimate reduced forms
				preserve
					merge 1:1 stu using "${builddata}${city}_analysisfile`sample'`sch_res'`bw'_older_bl", keep(1 3)
					g merged = (_merge == 3)
					keep if `S'
					gsort -merged stu
					qui count if merged == 0
					local unmerged = r(N)
					qui count if merged == 1
					local merged = r(N)

			 		qui _regress `Y' rf_* `p' $iv_controls
					local N_rf = e(N)
					local df_rf = e(df_m)
					predict rf_resids, r
					g cons = 1
					mata rf_X = st_data(., ("rf_* `p' $iv_controls cons"))
					mata rf_e = st_data(.,"rf_resids")
				restore

				* estimate VAM
				preserve
					u "${builddata}${city}_analysisfile`sample'`sch_res'`bw'_older_bl", clear

					//enrollment
					forval j = 1/`J'{
						gen vam_`j' = enr_`oldnum_`j''
					}

					ivset, y(`y') ptype(`ptype')
						local Y=r(Y)
						local p=r(P)

					merge 1:1 stu using "${builddata}${city}_analysisfile`sample'`sch_res'`bw'", keep(1 3)
					g merged = (_merge == 3)

					gsort -merged stu
					if "`vam'" == "conv_3yrlag" local controls $iv_controls
					if "`vam'" == "rcvam_3yrlag" local controls $iv_controls `p'
					qui _regress `Y' vam_* `controls', nocons
					local N_vam = e(N)
					local df_vam = e(df_m)
					predict vam_resids, r
					mata vam_X = st_data(., ("vam_* `controls'"))
					mata vam_e = st_data(.,"vam_resids")

				restore

				* fill out RF
				mata rows = rows(vam_X) + `unmerged' - rows(rf_X)
				mata add = J(rows,cols(rf_X),0)
				mata rf_X_full = rf_X \ add
				mata add = J(rows,1,0)
				mata rf_e_full = rf_e \ add

				* fill out VAM
				* sandwich zeros for kids who have older lags but not in RF estimation sample
				mata top = vam_X[1..`merged',1..cols(vam_X)]
				mata bot = vam_X[`=`merged'+1'..rows(vam_X),1..cols(vam_X)]
				mata delta = J(`unmerged',cols(vam_X),0)
				mata vam_X = top \ delta \ bot
				mata top = vam_e[1..`merged',1]
				mata bot = vam_e[`=`merged'+1'..rows(vam_e),1]
				mata delta = J(`unmerged',1,0)
				mata vam_e = top \ delta \ bot

				* estimate covariance
				mata vam_cov = invsym(vam_X'*vam_X)*(vam_X'*rf_X_full)*invsym(rf_X_full'*rf_X_full)*(vam_e'*rf_e_full)/(`N_vam'-`df_rf'-`df_vam')
				mata vam_cov_JL = vam_cov[1..`J',1..`L']
				mata st_matrix("vam_cov_JL",vam_cov_JL)
				mata st_matrix("vam_cov",vam_cov)

				* output
				preserve
					clear
					svmat vam_cov_JL
					export delimited "${builddata}${city}_V_alpharho_`vam'_JL_new_`y'.csv", replace novarnames
					clear
					svmat vam_cov
					export delimited "${builddata}${city}_V_alpharho_`vam'_new_`y'.csv", replace novarnames
				restore

			}
	}
